Predicting the Price of an AirBnB

Jacob A. Torres


import pandas as pd
import numpy as np
from pandas_profiling import ProfileReport

Exploratory Data Analysis

data_url = './data/ab_us_2020.csv'
df = pd.read_csv(data_url)

print(f"Dataset: {df.shape}")
df.head()
-------------------------------------------------------------
NameError                   Traceback (most recent call last)
<ipython-input-1-ad3891df160b> in <module>
      1 data_url = './data/ab_us_2020.csv'
----> 2 df = pd.read_csv(data_url)
      3 
      4 print(f"Dataset: {df.shape}")
      5 df.head()

NameError: name 'pd' is not defined
ProfileReport(df)





Feature and Target Selection

# Drop irrelevant features
df = df.copy().drop(
  columns=['id', 'name', 'host_id', 'host_name',
    'neighbourhood_group', 'reviews_per_month', 'last_review']
)

# Feature and target matrices
target = 'price'
X = df.drop(target, axis=1)
y = df[target]

print(f"Features: {X.shape}")
X.head()
Features: (226030, 9)
neighbourhood latitude longitude room_type minimum_nights number_of_reviews calculated_host_listings_count availability_365 city
0 28804 35.65146 -82.62792 Private room 1 138 1 0 Asheville
1 28801 35.59779 -82.55540 Entire home/apt 1 114 11 288 Asheville
2 28801 35.60670 -82.55563 Entire home/apt 30 89 2 298 Asheville
3 28806 35.57864 -82.59578 Entire home/apt 1 267 5 0 Asheville
4 28801 35.61442 -82.54127 Private room 30 58 1 0 Asheville